Example Complex Pattern

ReggerIt is used to compose regular expression patterns, compatible for the pattern parameter in Regex in the Microsoft DotNet framework.

The example on this page demonstrates how to compose a pattern, which enables to read table-names from a SQL query.

The composition is layered:

  • Definition of a label;
  • Definition how a table name is constructed;
  • The usage of tables in a SQL query - in the from and join clauses

With this approach, it is also very easy to "debug" a regex pattern. You can temporarily out-comment parts of the pattern and test its remains, to check your expectations with the actual outcome. In the example below, you can test the "from" clause without the "join"s, by outcommenting the "join" part on line 36.

Another easy-maintenance example: if you change the definition of label - ie it must start with a letter - then you only have to change line 12.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
open ReggerIt
open System.Text.RegularExpressions

///  Cleanup an input string
let cleanup (s:string) = Regex.Replace(s.ToLower(), "\s+", " ").Trim()

//  Build up pattern to extract table names from a SQL query string
let groupName = "table"

//  Tablename basics
let brackets s = Plain "[" + s + Plain "]"
let label = OnceOrMore Macro.wordCharacter
let labelNotation = label ||| brackets label

//  Notation basics
let dot = Plain "."
let white = OnceOrMore Macro.whitespace
let optWhite = ZeroOrMore (Macro.whitespace)
let commaSeperated = optWhite + Plain "," + optWhite

//  Tablename notations
let tableName = NamedGroup groupName labelNotation
let ``database..table`` = labelNotation + dot + dot + tableName
let ``schema.table``    = labelNotation + dot + tableName
let ``database.schema.table`` = labelNotation + dot + labelNotation + dot + tableName
let table = ``database..table`` ||| ``database.schema.table`` ||| ``schema.table`` ||| tableName
let tableAlias = table + Optional((white + Plain "as " + label) ||| (white + label))

//  from clause
let fromTables = tableAlias + ZeroOrMore(commaSeperated + tableAlias)

//  join clauses
let joinKind = Plain "inner" ||| Plain "outer" |||  Plain "left" |||  Plain "right"
let join = white + joinKind + white + Plain "join" + white + tableAlias

let select = Plain "select " + ZeroOrMore(Macro.any) + Plain "from " + fromTables + ZeroOrMore(join)

//  Prepare for Regex.Match
let pattern = select |> Convert.ToStringStartPattern

let input = """
SELECT p.Name AS ProductName,
NonDiscountSales = (OrderQty * UnitPrice),
Discounts = ((OrderQty * UnitPrice) * UnitPriceDiscount)
FROM Production.Product AS p
INNER JOIN Sales.SalesOrderDetail AS sod
ON p.ProductID = sod.ProductID
ORDER BY ProductName DESC;
"""

let extract = Regex.Match(input |> cleanup, pattern)

extract.Groups.[groupName].Captures
|>  Seq.iter(fun c -> printfn "%s" c.Value)

Here is the path to downloading

1: 
paket install ReggerIt
module ReggerIt
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
val cleanup : s:string -> string


  Cleanup an input string
val s : string
Multiple items
val string : value:'T -> string

--------------------
type string = System.String
Multiple items
type Regex =
  new : pattern:string -> Regex + 2 overloads
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member MatchTimeout : TimeSpan
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  ...

--------------------
Regex(pattern: string) : Regex
Regex(pattern: string, options: RegexOptions) : Regex
Regex(pattern: string, options: RegexOptions, matchTimeout: System.TimeSpan) : Regex
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator) : string
Regex.Replace(input: string, pattern: string, replacement: string) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions, matchTimeout: System.TimeSpan) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions, matchTimeout: System.TimeSpan) : string
System.String.ToLower() : string
System.String.ToLower(culture: System.Globalization.CultureInfo) : string
val groupName : string
val brackets : s:RexPatt -> RexPatt
val s : RexPatt
val Plain : string -> RexPatt
val label : RexPatt
val OnceOrMore : RexPatt -> RexPatt
module Macro

from ReggerIt
val wordCharacter : RexPatt
val labelNotation : RexPatt
val dot : RexPatt
val white : RexPatt
val whitespace : RexPatt
val optWhite : RexPatt
val ZeroOrMore : RexPatt -> RexPatt
val commaSeperated : RexPatt
val tableName : RexPatt
val NamedGroup : string -> RexPatt -> RexPatt
val ( database..table ) : RexPatt
val ( schema.table ) : RexPatt
val ( database.schema.table ) : RexPatt
val table : RexPatt
val tableAlias : RexPatt
val Optional : RexPatt -> RexPatt
val fromTables : RexPatt
val joinKind : RexPatt
val join : RexPatt
val select : RexPatt
val any : RexPatt
val pattern : string
module Convert

from ReggerIt
val ToStringStartPattern : RexPatt -> string
val input : string
val extract : Match
Regex.Match(input: string, pattern: string) : Match
Regex.Match(input: string, pattern: string, options: RegexOptions) : Match
Regex.Match(input: string, pattern: string, options: RegexOptions, matchTimeout: System.TimeSpan) : Match
property Match.Groups: GroupCollection with get
module Seq

from Microsoft.FSharp.Collections
val iter : action:('T -> unit) -> source:seq<'T> -> unit
val c : Capture
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
property Capture.Value: string with get